#Code to create measure of dyadic representation in Senate
#Created by Mac Lockhart, Feb 16 2023

library(tidyverse)
library(readr)
library(fuzzyjoin)
library(car)
library(stringr)
library(maps)
library(data.table)
library(cdlTools)
setwd(rprojroot::find_root(rprojroot::is_rstudio_project))
getwd()
df <- fread("Intermediate Data/merged_cces_rollcall.csv")
df<-tibble(df)
link <- readxl::read_xlsx("Intermediate Data/policy_outcomes_and_notes.xlsx")
link<-link[!is.na(link$data_num),]

results <- tibble(df$X)
Sens <- tibble(df$X)

votes<- c(paste0("V", 1:111))
#unique combination of data_num and data_year
for(vote in votes){
  years <- unique(link[link$variable_name_senate==vote,]$data_year)
  years<-years[!is.na(years)]
  for(year in years){
    rcv <- link[link$variable_name_senate==vote & link$data_year==year & 
                  !is.na(link$data_num),]$data_num
    rcv<-rcv[!is.na(rcv)]
    if(length(rcv)==1){
      vote_1<-paste0("1_",vote, ".s")
      vote_2<-paste0("2_",vote, ".s")
      vote_3<-paste0("3_",vote, ".s")
      vote_4<-paste0("4_",vote, ".s")
      vote_5<-paste0("5_",vote, ".s")
      vote_6<-paste0("6_",vote, ".s")
      vote_7<-paste0("7_",vote)
      vote_8<-paste0("8_",vote)
      vote_9<-paste0("9_",vote)
      view <- paste0("rcv",rcv,"_r")
      #identify respondents view using RCV which is the CCES vote number
        result<-rowSums(cbind((df[,vote_1]==df[, view]), (df[,vote_2]==df[, view]), 
                  (df[,vote_3]==df[, view]), (df[,vote_4]==df[, view]),
                  (df[,vote_5]==df[, view]), (df[,vote_6]==df[, view]),
                  (df[,vote_7]==df[, view]), (df[,vote_8]==df[, view]),
                  (df[,vote_9]==df[, view])), na.rm=T)
        #identifies all 4 potential vote outcomes
        v1 <- ifelse(!is.na(df[,vote_1]), (df[,vote_1]==df[, view]), NA) 
        v2 <- ifelse(!is.na(df[,vote_2]), (df[,vote_2]==df[, view]), NA)
        v3 <- ifelse(!is.na(df[,vote_3]), (df[,vote_3]==df[, view]), NA)
        v4 <- ifelse(!is.na(df[,vote_4]), (df[,vote_4]==df[, view]), NA)
        v5 <- ifelse(!is.na(df[,vote_5]), (df[,vote_5]==df[, view]), NA)
        v6 <- ifelse(!is.na(df[,vote_6]), (df[,vote_6]==df[, view]), NA)
        v7 <- ifelse(!is.na(df[,vote_7]), (df[,vote_7]==df[, view]), NA)
        v8 <- ifelse(!is.na(df[,vote_8]), (df[,vote_8]==df[, view]), NA)
        v9 <- ifelse(!is.na(df[,vote_9]), (df[,vote_9]==df[, view]), NA)
        #picks both non missing values from 4 possible votes
        vote1<-coalesce(v1, v2, v3, v4, v5, v6, v7, v8, v9)
        vote2<-coalesce(v9, v8, v7, v6, v5, v4, v3, v2, v1)
        result <- cbind(tibble(vote1), tibble(vote2))
        rm(vote1, vote2, v1, v2, v3, v4, v5, v6, v7, v8, v9)
        #repeats process above but just identifies Senators who cast the above votes
        s1 <- ifelse(!is.na(df[,vote_1]), df$Sen1, NA)
        s2 <- ifelse(!is.na(df[,vote_2]), df$Sen2, NA)
        s3 <- ifelse(!is.na(df[,vote_3]), df$Sen3, NA)
        s4 <- ifelse(!is.na(df[,vote_4]), df$Sen4, NA)
        s5 <- ifelse(!is.na(df[,vote_5]), df$Sen5, NA)
        s6 <- ifelse(!is.na(df[,vote_6]), df$Sen6, NA)
        s7 <- ifelse(!is.na(df[,vote_7]), df$Sen7, NA)
        s8 <- ifelse(!is.na(df[,vote_8]), df$Sen8, NA)
        s9 <- ifelse(!is.na(df[,vote_9]), df$Sen9, NA)
        sen1<-coalesce(s1, s2, s3, s4, s5, s6, s7, s8, s9)
        sen2<-coalesce(s9, s8, s7, s6, s5, s4, s3, s2, s1)
        sen <- cbind(tibble(sen1), tibble(sen2))
        rm(sen1, sen2, s1, s2, s3, s4, s5, s6, s7, s8, s9)
      
      #cleans data to merge 
      sen[df$year!=year, 1]<-NA
      sen[df$year!=year, 2]<-NA
      sen <- tibble(sen)
      colnames(sen)<-c(paste0(vote, year, "_sen1"), paste0(vote, year, "_sen2"))
      Sens<-cbind(Sens, sen)
      
      result[df$year!=year,1]<-NA
      result[df$year!=year,2]<-NA
      result <- tibble(result)
      colnames(result)<-c(paste0(vote, year, "_vote1"), paste0(vote, year, "_vote2"))
      results<-cbind(results, result)
      n<-ncol(results)
      print(sum(result[,1]==1, na.rm=T))
      result<-NULL
    }
    print(year)}
}

results<-tibble(results)
results<-results %>%
  mutate(Total = select(., `V352009_vote1`:`V1112010_vote2`) %>% rowSums(na.rm = TRUE))

table(results$Total, df$year)

results %>%
  select(where(function(x) any(!is.na(x))))

#Make the table long rather than wide (i.e. per vote)
DT <- as.data.table(results)
DT<-DT[,which(unlist(lapply(DT, function(x)!all(is.na(x))))),with=F]
data_long <- gather(DT, vote, outcome, `V352009_vote1`:`V1112010_vote2`, factor_key=TRUE)
colnames(data_long)<-c("X", "total", "vote", "outcome")
data_long$vote <- gsub("vote", "", data_long$vote) #changed from string


#Make the table long rather than wide (i.e. per vote)
DT_Sens <- as.data.table(Sens)
DT_Sens<-DT_Sens[,which(unlist(lapply(DT_Sens, function(x)!all(is.na(x))))),with=F]
data_long_Sens <- gather(DT_Sens, vote, outcome, `V352009_sen1`:`V1112010_sen2`, factor_key=TRUE)
colnames(data_long_Sens)<-c("X", "vote", "Sen")
data_long_Sens$vote <- gsub("sen", "", data_long_Sens$vote) #changed from string

data_long<-left_join(data_long, data_long_Sens, by=c("X"="X", "vote" = "vote"))

#Merge back in individual variables from df
merge <- link %>% mutate(resp_vote_ID = paste0("rcv", data_num, "_r"),
                        dat_vote_ID = paste0(variable_name_senate, data_year)) %>%  
                  select(resp_vote_ID, dat_vote_ID, data_year)


views<-df %>% select(ends_with("_r"), "X", "year") %>% pivot_longer(
  cols = starts_with("rcv"), # Selects all columns starting with "rcv"
  names_to = "resp_vote_ID",     # New column for the variable names
  values_to = "respondent_view"        # New column for the values
) %>% left_join(merge, by=c("resp_vote_ID"  = "resp_vote_ID", "year" = "data_year")) %>%
  filter(!is.na(respondent_view))


voter_info<-df[, c(1:15,37:47)]
colnames(voter_info)
x<-left_join(data_long, voter_info, by=c("X" = "X"))
x$dat_vote_ID <- str_extract(x$vote, "^[^_]+")
gc()
x<-left_join(x, views, by=c("X" = "X", "dat_vote_ID" = "dat_vote_ID"))
x$outcome <- ifelse(x$respondent_view==8, NA, x$outcome)

x <- subset(x, respondent_view %in% c(0,1))
getwd()
write_csv2(x, "Intermediate Data/senate_vote_level_data.csv")
